/*This do file creates master data set used to estimate product space demand systems for motor oils in "An Evaluation of Merger Simulations".  
We consider the market for nationally branded conventional motor oils, which include Castrol GTX, Havoline F3, Mobil, Pennzoil, Quaker State,
Valvoline Multi Viscosity, and Private label products aggregated together.  Weights are restricted to 10w30*/

#delimit;
clear all;
set matsize 1000;
cap set mem 300m;
cap log close;
set more off;
local path "c:/data/restat_12_9/raw/";
local outpath "c:/data/restat_12_9/analysis_data/";

di "`path'";

use "`path'oilagg";



rename region REGION;
rename brand BRAND;
rename weight WEIGHT;
rename type TYPE;
rename week WEEK;
rename index INDEX;
rename nweek NWEEK;
rename keep KEEP;

drop if REGION=="";
keep if TYPE=="conventional";
keep if WEIGHT=="10W30";

drop WEIGHT TYPE WEEK INDEX NWEEK KEEP TYPE;



sort year month;
/*focus on nationally branded oils*/
keep if BRAND=="CASTROL GT X"|BRAND=="HAVOLINE FORMULA 3"|BRAND=="MOBIL"|BRAND=="PENNZOIL"|BRAND=="PRIVATE LABEL"|BRAND=="QUAKER STATE"|BRAND=="VALVOLINE MULTI VISCOSITY";

/*eliminate spaces in brand string values so reshape works*/
replace BRAND="CASTROL" if BRAND=="CASTROL GT X";
replace BRAND="HAVOLINE" if BRAND=="HAVOLINE FORMULA 3";
replace BRAND="QUAKER" if BRAND=="QUAKER STATE";
replace BRAND="PRIVATE" if BRAND=="PRIVATE LABEL";
replace BRAND="VALVOLINE" if BRAND=="VALVOLINE MULTI VISCOSITY";



/*number of brands*/
local nbrands=7;

/*create a numeric time variable*/
destring year, replace;
replace year=year*10000;
destring month, replace;
replace month=month*100;
destring day, replace;
gen t=year+month+day;


/*rectangularize dataset, there are two missing observations for private label products in baltimore/washington for some reason.  i 
impute sales and volume as the values of the preceeding week in that region*/
fillin BRAND REGION t;
replace month=600 if t==19970628 & REGION=="BALTIMORE/WASHGTON";
replace month=700 if t==19970728 & REGION=="BALTIMORE/WASHGTON";

replace day=28 if t==19970628 & REGION=="BALTIMORE/WASHGTON";
replace day=28 if t==19970728 & REGION=="BALTIMORE/WASHGTON";

replace year=19970000 if t==19970628 & REGION=="BALTIMORE/WASHGTON";
replace year=19970000 if t==19970728 & REGION=="BALTIMORE/WASHGTON";
sort BRAND REGION t;
replace sale=sale[_n-1] if _fillin==1;
replace vol=vol[_n-1] if _fillin==1;

sort REGION BRAND t;
replace date=date[_n-1] if _fillin==1;

gen merged=t>=19981200;
keep if merged==0;


/*generate average price for each brand of oil*/
gen price=sale/vol;
gen lprice=log(price);


/*rename REGIONS so reshape won't give variable names that are too long*/
replace REGION="WASHINGMORE" if REGION=="BALTIMORE/WASHGTON";
replace REGION="DALLAS" if REGION=="DALLAS/FT WORTH";
replace REGION="LA" if REGION=="LOS ANGELES";
replace REGION="MINN" if REGION=="MINNEAPOLS/ST PAUL";
replace REGION="NY" if REGION=="NEW YORK (NEW)";
replace REGION="PHOENIX" if REGION=="PHOENIX/TUCSON";
replace REGION="SAND" if REGION=="SAN DIEGO";
replace REGION="SF" if REGION=="SAN FRANCSCO/OAKLD";

/*gen total sales for each market defined by region and time*/
bysort REGION t: egen exp=total(sale);
gen lexp=log(exp);
/*revenue share*/
gen share=sale/exp;
sort t BRAND;
save otemp, replace;





local list "CASTROL HAVOLINE MOBIL PENNZOIL PRIVATE QUAKER VALVOLINE";

/*generate the sum of each brands price and log of price across other regions, these are hausman instruments*/
drop sale vol day month year date _fillin merged exp lexp share;
reshape wide lprice price, i(t BRAND) j(REGION) string;
sort t BRAND;
merge t BRAND using otemp;
tab _merge;
drop _merge;
gen ivprice=0;
gen livprice=0;
replace ivprice=(priceNY+priceCHICAGO+priceMINN+priceDALLAS)/4 if REGION=="WASHINGMORE";
replace ivprice=(priceMINN+priceNY+priceDALLAS+priceWASHINGMORE)/4 if REGION=="CHICAGO";
replace ivprice=(priceHOUSTON+priceCHICAGO+priceMINN+pricePHOENIX)/4 if REGION=="DALLAS";
replace ivprice=(priceCHICAGO+priceDALLAS+priceMINN+pricePHOENIX)/4 if REGION=="HOUSTON";
replace ivprice=(priceSAND+pricePHOENIX+priceSF+priceDALLAS)/4 if REGION=="LA";
replace ivprice=(priceCHICAGO+priceNY+priceDALLAS+pricePHOENIX)/4 if REGION=="MINN";
replace ivprice=(priceWASHINGMORE+priceCHICAGO+priceMINN+priceDALLAS)/4 if REGION=="NY";
replace ivprice=(priceLA+priceSAND+priceHOUSTON+priceDALLAS)/4 if REGION=="PHOENIX";
replace ivprice=(priceSF+priceLA+pricePHOENIX+priceDALLAS)/4 if REGION=="SAND";
replace ivprice=(priceLA+priceSAND+pricePHOENIX+priceDALLAS)/4 if REGION=="SF";

replace livprice=(lpriceNY+lpriceCHICAGO+lpriceMINN+lpriceDALLAS)/4 if REGION=="WASHINGMORE";
replace livprice=(lpriceMINN+lpriceNY+lpriceDALLAS+lpriceWASHINGMORE)/4 if REGION=="CHICAGO";
replace livprice=(lpriceHOUSTON+lpriceCHICAGO+lpriceMINN+lpricePHOENIX)/4 if REGION=="DALLAS";
replace livprice=(lpriceCHICAGO+lpriceDALLAS+lpriceMINN+lpricePHOENIX)/4 if REGION=="HOUSTON";
replace livprice=(lpriceSAND+lpricePHOENIX+lpriceSF+lpriceDALLAS)/4 if REGION=="LA";
replace livprice=(lpriceCHICAGO+lpriceNY+lpriceDALLAS+lpricePHOENIX)/4 if REGION=="MINN";
replace livprice=(lpriceWASHINGMORE+lpriceCHICAGO+lpriceMINN+lpriceDALLAS)/4 if REGION=="NY";
replace livprice=(lpriceLA+lpriceSAND+lpriceHOUSTON+lpriceDALLAS)/4 if REGION=="PHOENIX";
replace livprice=(lpriceSF+lpriceLA+lpricePHOENIX+lpriceDALLAS)/4 if REGION=="SAND";
replace livprice=(lpriceLA+lpriceSAND+lpricePHOENIX+lpriceDALLAS)/4 if REGION=="SF";





/*reshape wide with stubs brand to have prices of rivals as variables*/
sort REGION t;
save otemp, replace;


drop year vol sale exp share month day merged lexp priceCHICAGO-lpriceWASHINGMORE _fillin date;
reshape wide lprice price livprice ivprice, i(t REGION) j(BRAND) string;
sort REGION t;
merge REGION t using otemp;
drop _merge priceCHICAGO-lpriceWASHINGMORE;
local list "CASTROL HAVOLINE MOBIL PENNZOIL PRIVATE QUAKER VALVOLINE";



/*Constructs "expenditure" variables, which are region-time total expenditures deflated by a stone logarithmic price index with 
region specific shares averaged over time for the aids system 
and total expenditures for the linear system and log of total expenditures for the log-linear systems.*/


/*this creates scalars that are average shares by brand and region*/
qui tab BRAND, gen(bra);
forval x=1/7{;
	replace bra`x'=0 if bra`x'==.;
};

qui tab BRAND, gen(brand);
qui tab REGION, gen(reg);
local nregions=10;



/*total revenue share*/
bysort BRAND REGION: egen tsaleall=total(exp);
bysort BRAND REGION: egen bsale=total(sale);
gen avgshare=bsale/tsaleall;


forval x=1/`nregions'{;
    forval y=1/`nbrands'{;
        qui sum avgshare if reg`x'==1 & brand`y'==1;
        scalar w`x'b`y'all=r(mean);
    };
};
drop tsaleall avgshare bsale;

gen indexall=0;
forval x=1/`nregions'{;
    replace indexall=w`x'b1all*lpriceCASTROL+				 
				  w`x'b2all*lpriceHAVOLINE+				  
				  w`x'b3all*lpriceMOBIL+
				  w`x'b4all*lpricePENNZOIL+
				  w`x'b5all*lpricePRIVATE+				 
				  w`x'b6all*lpriceQUAKER+				  
				  w`x'b7all*lpriceVALVOLINE				 
				   if reg`x'==1;
};
gen laidp=lexp-indexall;





gen lin_indexall=0;
forval x=1/`nregions'{;
    replace lin_indexall=w`x'b1all*priceCASTROL+				 
				  w`x'b2all*priceHAVOLINE+				  
				  w`x'b3all*priceMOBIL+
				  w`x'b4all*pricePENNZOIL+
				  w`x'b5all*pricePRIVATE+				 
				  w`x'b6all*priceQUAKER+				  
				  w`x'b7all*priceVALVOLINE			
				   if reg`x'==1;
};
/*Create Instruments for index and deflated expenditure terms*/
sort t;
save temp, replace;


drop   BRAND-lprice share-reg10 priceCASTROL-livpriceVALVOLINE;
duplicates drop;
reshape wide exp laidp indexall lin_indexall lexp, i(t) j(REGION) string;
sort t;
merge t using temp;
#delimit;


drop _merge;


gen ivlaidp=0;
gen ivlexp=0;
gen ivindexall=0;
gen ivlin_indexall=0;
gen ivexp=0;


replace ivexp=(expNY+expCHICAGO+expMINN+expDALLAS)/4 if REGION=="WASHINGMORE";
replace ivexp=(expMINN+expNY+expDALLAS+expWASHINGMORE)/4 if REGION=="CHICAGO";
replace ivexp=(expHOUSTON+expCHICAGO+expMINN+expPHOENIX)/4 if REGION=="DALLAS";
replace ivexp=(expCHICAGO+expDALLAS+expMINN+expPHOENIX)/4 if REGION=="HOUSTON";
replace ivexp=(expSAND+expPHOENIX+expSF+expDALLAS)/4 if REGION=="LA";
replace ivexp=(expCHICAGO+expNY+expDALLAS+expPHOENIX)/4 if REGION=="MINN";
replace ivexp=(expWASHINGMORE+expCHICAGO+expMINN+expDALLAS)/4 if REGION=="NY";
replace ivexp=(expLA+expSAND+expHOUSTON+expDALLAS)/4 if REGION=="PHOENIX";
replace ivexp=(expSF+expLA+expPHOENIX+expDALLAS)/4 if REGION=="SAND";
replace ivexp=(expLA+expSAND+expPHOENIX+expDALLAS)/4 if REGION=="SF";


replace ivindexall=(indexallNY+indexallCHICAGO+indexallMINN+indexallDALLAS)/4 if REGION=="WASHINGMORE";
replace ivindexall=(indexallMINN+indexallNY+indexallDALLAS+indexallWASHINGMORE)/4 if REGION=="CHICAGO";
replace ivindexall=(indexallHOUSTON+indexallCHICAGO+indexallMINN+indexallPHOENIX)/4 if REGION=="DALLAS";
replace ivindexall=(indexallCHICAGO+indexallDALLAS+indexallMINN+indexallPHOENIX)/4 if REGION=="HOUSTON";
replace ivindexall=(indexallSAND+indexallPHOENIX+indexallSF+indexallDALLAS)/4 if REGION=="LA";
replace ivindexall=(indexallCHICAGO+indexallNY+indexallDALLAS+indexallPHOENIX)/4 if REGION=="MINN";
replace ivindexall=(indexallWASHINGMORE+indexallCHICAGO+indexallMINN+indexallDALLAS)/4 if REGION=="NY";
replace ivindexall=(indexallLA+indexallSAND+indexallHOUSTON+indexallDALLAS)/4 if REGION=="PHOENIX";
replace ivindexall=(indexallSF+indexallLA+indexallPHOENIX+indexallDALLAS)/4 if REGION=="SAND";
replace ivindexall=(indexallLA+indexallSAND+indexallPHOENIX+indexallDALLAS)/4 if REGION=="SF";

replace ivlin_indexall=(lin_indexallNY+lin_indexallCHICAGO+lin_indexallMINN+lin_indexallDALLAS)/4 if REGION=="WASHINGMORE";
replace ivlin_indexall=(lin_indexallMINN+lin_indexallNY+lin_indexallDALLAS+lin_indexallWASHINGMORE)/4 if REGION=="CHICAGO";
replace ivlin_indexall=(lin_indexallHOUSTON+lin_indexallCHICAGO+lin_indexallMINN+lin_indexallPHOENIX)/4 if REGION=="DALLAS";
replace ivlin_indexall=(lin_indexallCHICAGO+lin_indexallDALLAS+lin_indexallMINN+lin_indexallPHOENIX)/4 if REGION=="HOUSTON";
replace ivlin_indexall=(lin_indexallSAND+lin_indexallPHOENIX+lin_indexallSF+lin_indexallDALLAS)/4 if REGION=="LA";
replace ivlin_indexall=(lin_indexallCHICAGO+lin_indexallNY+lin_indexallDALLAS+lin_indexallPHOENIX)/4 if REGION=="MINN";
replace ivlin_indexall=(lin_indexallWASHINGMORE+lin_indexallCHICAGO+lin_indexallMINN+lin_indexallDALLAS)/4 if REGION=="NY";
replace ivlin_indexall=(lin_indexallLA+lin_indexallSAND+lin_indexallHOUSTON+lin_indexallDALLAS)/4 if REGION=="PHOENIX";
replace ivlin_indexall=(lin_indexallSF+lin_indexallLA+lin_indexallPHOENIX+lin_indexallDALLAS)/4 if REGION=="SAND";
replace ivlin_indexall=(lin_indexallLA+lin_indexallSAND+lin_indexallPHOENIX+lin_indexallDALLAS)/4 if REGION=="SF";


replace ivexp=(expNY+expCHICAGO+expMINN+expDALLAS)/4 if REGION=="WASHINGMORE";
replace ivexp=(expMINN+expNY+expDALLAS+expWASHINGMORE)/4 if REGION=="CHICAGO";
replace ivexp=(expHOUSTON+expCHICAGO+expMINN+expPHOENIX)/4 if REGION=="DALLAS";
replace ivexp=(expCHICAGO+expDALLAS+expMINN+expPHOENIX)/4 if REGION=="HOUSTON";
replace ivexp=(expSAND+expPHOENIX+expSF+expDALLAS)/4 if REGION=="LA";
replace ivexp=(expCHICAGO+expNY+expDALLAS+expPHOENIX)/4 if REGION=="MINN";
replace ivexp=(expWASHINGMORE+expCHICAGO+expMINN+expDALLAS)/4 if REGION=="NY";
replace ivexp=(expLA+expSAND+expHOUSTON+expDALLAS)/4 if REGION=="PHOENIX";
replace ivexp=(expSF+expLA+expPHOENIX+expDALLAS)/4 if REGION=="SAND";
replace ivexp=(expLA+expSAND+expPHOENIX+expDALLAS)/4 if REGION=="SF";


replace ivlaidp=(laidpNY+laidpCHICAGO+laidpMINN+laidpDALLAS)/4 if REGION=="WASHINGMORE";
replace ivlaidp=(laidpMINN+laidpNY+laidpDALLAS+laidpWASHINGMORE)/4 if REGION=="CHICAGO";
replace ivlaidp=(laidpHOUSTON+laidpCHICAGO+laidpMINN+laidpPHOENIX)/4 if REGION=="DALLAS";
replace ivlaidp=(laidpCHICAGO+laidpDALLAS+laidpMINN+laidpPHOENIX)/4 if REGION=="HOUSTON";
replace ivlaidp=(laidpSAND+laidpPHOENIX+laidpSF+laidpDALLAS)/4 if REGION=="LA";
replace ivlaidp=(laidpCHICAGO+laidpNY+laidpDALLAS+laidpPHOENIX)/4 if REGION=="MINN";
replace ivlaidp=(laidpWASHINGMORE+laidpCHICAGO+laidpMINN+laidpDALLAS)/4 if REGION=="NY";
replace ivlaidp=(laidpLA+laidpSAND+laidpHOUSTON+laidpDALLAS)/4 if REGION=="PHOENIX";
replace ivlaidp=(laidpSF+laidpLA+laidpPHOENIX+laidpDALLAS)/4 if REGION=="SAND";
replace ivlaidp=(laidpLA+laidpSAND+laidpPHOENIX+laidpDALLAS)/4 if REGION=="SF";

replace ivlexp=(lexpNY+lexpCHICAGO+lexpMINN+lexpDALLAS)/4 if REGION=="WASHINGMORE";
replace ivlexp=(lexpMINN+lexpNY+lexpDALLAS+lexpWASHINGMORE)/4 if REGION=="CHICAGO";
replace ivlexp=(lexpHOUSTON+lexpCHICAGO+lexpMINN+lexpPHOENIX)/4 if REGION=="DALLAS";
replace ivlexp=(lexpCHICAGO+lexpDALLAS+lexpMINN+lexpPHOENIX)/4 if REGION=="HOUSTON";
replace ivlexp=(lexpSAND+lexpPHOENIX+lexpSF+lexpDALLAS)/4 if REGION=="LA";
replace ivlexp=(lexpCHICAGO+lexpNY+lexpDALLAS+lexpPHOENIX)/4 if REGION=="MINN";
replace ivlexp=(lexpWASHINGMORE+lexpCHICAGO+lexpMINN+lexpDALLAS)/4 if REGION=="NY";
replace ivlexp=(lexpLA+lexpSAND+lexpHOUSTON+lexpDALLAS)/4 if REGION=="PHOENIX";
replace ivlexp=(lexpSF+lexpLA+lexpPHOENIX+lexpDALLAS)/4 if REGION=="SAND";
replace ivlexp=(lexpLA+lexpSAND+lexpPHOENIX+lexpDALLAS)/4 if REGION=="SF";







forval x=1/`nbrands'{;
	foreach y of local list{;
		gen lprice_`x'_`y'=bra`x'*lprice`y';
		gen price_`x'_`y'=bra`x'*price`y';
		gen ivlprice_`x'_`y'=bra`x'*livprice`y';
		gen ivprice_`x'_`y'=bra`x'*ivprice`y';
	};
};


sort REGION month year day;
save otemp, replace;


use "`path'/oilpop", clear;
keep if YEAR==1997|YEAR==1998;
drop if YEAR==1998&MONTH>11;
duplicates drop;

replace REGION="WASHINGMORE" if REGION=="BALTIMORE/WASHGTON";
replace REGION="DALLAS" if REGION=="DALLAS/FT WORTH";
replace REGION="LA" if REGION=="LOS ANGELES";
replace REGION="MINN" if REGION=="MINNEAPOLS/ST PAUL";
replace REGION="NY" if REGION=="NEW YORK (NEW)";
replace REGION="PHOENIX" if REGION=="PHOENIX/TUCSON";
replace REGION="SAND" if REGION=="SAN DIEGO";
replace REGION="SF" if REGION=="SAN FRANCSCO/OAKLD";



rename MONTH month;
rename YEAR year;
*rename DAY day;
destring year, replace;
replace year=year*10000;
destring month, replace;
replace month=month*100;
sort REGION month year;
recast double year month;
*drop day T WEEK;
duplicates drop;
merge 1:m REGION month year using otemp;


drop if _merge==1;
drop _merge;

#delimit;
gen wexp=exp/POP;
sort t;
save temp, replace;



drop month year POP expCHICAGO-ivprice_7_VALVOLINE;

duplicates drop;
reshape wide wexp, i(t) j(REGION) string;
sort t;
merge t using temp;
tab _merge;
#delimit;
drop _merge;


gen ivwexp=0;
replace ivwexp=(wexpNY+wexpCHICAGO+wexpMINN+wexpDALLAS)/4 if REGION=="WASHINGMORE";
replace ivwexp=(wexpMINN+wexpNY+wexpDALLAS+wexpWASHINGMORE)/4 if REGION=="CHICAGO";
replace ivwexp=(wexpHOUSTON+wexpCHICAGO+wexpMINN+wexpPHOENIX)/4 if REGION=="DALLAS";
replace ivwexp=(wexpCHICAGO+wexpDALLAS+wexpMINN+wexpPHOENIX)/4 if REGION=="HOUSTON";
replace ivwexp=(wexpSAND+wexpPHOENIX+wexpSF+wexpDALLAS)/4 if REGION=="LA";
replace ivwexp=(wexpCHICAGO+wexpNY+wexpDALLAS+wexpPHOENIX)/4 if REGION=="MINN";
replace ivwexp=(wexpWASHINGMORE+wexpCHICAGO+wexpMINN+wexpDALLAS)/4 if REGION=="NY";
replace ivwexp=(wexpLA+wexpSAND+wexpHOUSTON+wexpDALLAS)/4 if REGION=="PHOENIX";
replace ivwexp=(wexpSF+wexpLA+wexpPHOENIX+wexpDALLAS)/4 if REGION=="SAND";
replace ivwexp=(wexpLA+wexpSAND+wexpPHOENIX+wexpDALLAS)/4 if REGION=="SF";





#delimit;
drop wexpCHICAGO-wexpWASHINGMORE expCHICAGO-livpriceVALVOLINE _fillin merged ivprice livprice;

forval x=1/`nbrands'{;
	gen ivwexp`x'=ivwexp*bra`x';
};




save "`outpath'oilmaster", replace;

rm otemp.dta;
rm temp.dta;


